/*******************************************************************************
  ARTICLE	GAY, GOBBI, GONI (2025) "REVOLUTIONARY TRANSITIONS. INHERITANCE    
            CHANGE AND FERTILITY DECLINE" JOURNAL OF POLITICAL ECONOMY         
                                                                               
  AUTHORS	VICTOR GAY, PAULA GOBBI, MARC GONI                                 
  CONTACT	victor.gay@tse-fr.eu; paula.eugenia.gobbi@ulb.be; marc.goni@uib.no 
  VERSION	1.0 (MAY 2025)                                                     
  SOFTWARE	STATA SE 18                                                        
  LICENCE	MIT                                                                
--------------------------------------------------------------------------------

GENI RESULTS PAPER DO FILE

This file contains the codes to reproduce the figures and tables in the paper using the Geni data.

Instructions: 
-------------
	Gain access to the Geni database from MyHeritage, Ltd and either:
	
	1) place the following files:

		geni_profiles.csv (20.2 GB, created on April 4, 2022, at 11:39:31),
		geni_unions.csv (7.2 GB, created on April 4, 2022, at 11:55:59),
		geni_union details.csv (2.6 GB, created on April 4, 2022, at 11:54:34).
	
	in folder /1_raw_data/1_1_henri/ (see README for more details) and run R-codes 
	named 1-* to 8-* in folder "/2_scripts/2_1_data/01_geni_data_to_sample"; or
	
	2) place the author-provided fr-clean.csv file into the folder 3_outputs/3_1_datasets.
 
	After 1) or 2), run do-file 02_geni-inheritance-and-controls.do and 03_geni-final-data-prep.do	
	
	Open do-files from directory where they are placed; order matters; run whole code.
	
Contents: 
---------
	Program setup
	Figure 5: Fertility and distance to the inheritance border.
	Table 4: Spatial regression-discontinuity estimates, Geni database.

Date last update: May 2025; Ran using STATA 18.5
*/
********************************************************************************


********************
* 0. PROGRAM SETUP *
********************

version 18
clear all
set more off

************************
* PACKAGE DEPENDENCIES *
************************

ssc install rdrobust, replace
ssc install reghdfe, replace
ssc install ftools, replace
ssc install outreg2, replace

***************
* DIRECTORIES *
***************

global DAT 	= "../../3_outputs/3_1_datasets"
global TAB 	= "../../3_outputs/3_2_main/3_2_1_main_tables"
global FIG 	= "../../3_outputs/3_2_main/3_2_2_main_figures"

timer on 1

* ==============================================================================
* Figure 5: Fertility and distance to the inheritance border.
* ------------------------------------------------------------------------------

use "$DAT/final-geni.dta", clear

* Sample
keep if gender=="f" 
keep if byear>=1700 & byear<=1810
keep if twoplus_flag==1

* FE for bandwidths
qui tab byear, gen(byear_)
qui tab segment_affected_50, gen(seg_)
encode insee_com, gen(ninsee_com)

* Weights for kernel, based on 16.91 bw
local d = 16.91
gen wgt_1 = 1 - dista/`d' if inrange(dista, 0, `d')
replace wgt_1 = 1 - dista/(-`d') if inrange(dista, -`d', 0)
replace wgt_1 = 0 if mi(wgt_1)

foreach co in 0 1{
local d = 16.91
preserve 
keep if T01==`co'
keep if dista>=-`d' & dista<=`d'
* rdrobust results
rdrobust nfert dista, c(0) p(1) kernel(triangular) h(`d') all masspoints(adjust) covs(byear_* seg_*) vce(cluster ninsee_com)
local b = e(tau_bc)
local se = round(e(se_tau_rb), 0.01)
local pval = e(pv_rb)
local b  = round(`b', 0.01)
if `pval'>=0.05 & `pval'<0.1{
	local b = "`b'*"
}
if `pval'>=0.01 & `pval'<0.05{
	local b = "`b'**"
}
if `pval'<0.01{
	local b = "`b'***"
}
* 
summ nfert
local mdv = `r(mean)'
qui reg nfert i.byear i.segment_affected_50
predict res_nfert, residuals
replace res_nfert = res_nfert+`mdv'
* Number of bins based on Panel A
if `co'==0{
qui rdplot res_nfert dista, c(0) p(1) scale(2 2) kernel(triangular) binselect(es)
global nbinsl = `e(J_star_l)'
global nbinsr = `e(J_star_r)'
}
* rdplot
rdplot res_nfert dista, c(0) p(1) kernel(triangular) nbins($nbinsl $nbinsr )  genvars h(`d') ///
graph_options(xlabel(-15(5)15) ylabel(2(1)5) legend(off) ///
	xtitle("distance to inheritance border", size(large)) ytitle("completed fertility", size(large)) ///
	graphr(lc(white) fc(white)) plotr(lc(black)) aspect(0.4))
summ dista if dista<0, d
summ rdplot_hat_y if dista>=`r(max)'-0.0001 & dista<0
local eff0 = `r(mean)'
summ dista if dista>0, d
summ rdplot_hat_y if dista<=`r(min)'+0.0001 & dista>0
local eff1 = `r(mean)'
* nicer
if `co'==0{
twoway (lfitci res_nfert dista if inrange(dista,-`d',0) [aw=wgt_1], ciplot(rarea) alc(white) fc(stc1%15) lc(stc1)) ///
	(lfitci res_nfert dista if inrange(dista,0,`d') [aw=wgt_1], ciplot(rarea) alc(white) fc(stc2%15) lc(stc2)) ///
   (scatter rdplot_mean_y rdplot_mean_x if dista<0, sort msiz(small) mc(stc1%50)) ///
   (scatter rdplot_mean_y rdplot_mean_x if inrange(dista,0,`d'), sort msiz(small) mc(stc2%50)) ///
   (line rdplot_hat_y dista if dista<0, sort lc(stc1)) ///
   (line rdplot_hat_y dista if inrange(dista,0,`d'), sort lc(stc2)) ///
   (pcarrowi `eff0' 0 `eff1' 0 (1), lc(black) lw(thick) mlc(black)) ///
   , xlabel(-15(5)15) ylabel(2.5(0.5)5) xline(0, lp(shortdash) lc(black)) legend(off) ///
	xtitle("Distance to inheritance border", size(large)) ytitle("Completed fertility", size(large)) ///
   text(4.9 -9 "Egalitarian", place(0) c(stc1) size(large)) text(4.9 9.5 "Inegalitarian", place(0) c(stc2) size(large)) ///
   graphr(lc(white) fc(white)) plotr(lc(black)) aspect(0.5) scale(1.4) name(g`co'v3, replace)
graph export "$FIG/figure5a.jpg", as(jpg) name("g`co'v3") quality(90) replace
}
if `co'==1{
twoway (lfitci res_nfert dista if inrange(dista,-`d',0) [aw=wgt_1], ciplot(rarea) alc(white) fc(stc1%15) lc(stc1)) ///
	(lfitci res_nfert dista if inrange(dista,0,`d') [aw=wgt_1], ciplot(rarea) alc(white) fc(stc2%15) lc(stc2)) ///
	(scatter rdplot_mean_y rdplot_mean_x if dista<0, sort msiz(small) mc(stc1%50)) ///
	(scatter rdplot_mean_y rdplot_mean_x if inrange(dista,0,`d'), sort msiz(small) mc(stc2%50)) ///
	(line rdplot_hat_y dista if dista<0, sort lc(stc1)) ///
	(line rdplot_hat_y dista if inrange(dista,0,`d'), sort lc(stc2)) ///
	(pcarrowi `eff0' 0 `eff1' 0 (1), lc(black) lw(thick) mlc(black)) ///
   , xlabel(-15(5)15) ylabel(2.5(0.5)5) xline(0, lp(shortdash) lc(black)) legend(off) ///
	xtitle("Distance to inheritance border", size(large)) ytitle("Completed fertility", size(large)) ///
   text(4.9 -9 "Egalitarian", place(0) c(stc1) size(large)) text(4.9 9.5 "Inegalitarian", place(0) c(stc2) size(large)) ///
   text(4.63 -9 "(not reformed)", place(0) c(stc1) size(large)) text(4.63 9.5 "(reformed)", place(0) c(stc2) size(large)) ///
   graphr(lc(white) fc(white)) plotr(lc(black)) aspect(0.5) scale(1.4) name(g`co'v3, replace)
graph export "$FIG/figure5b.jpg", as(jpg) name("g`co'v3") quality(90) replace
}
restore
}
graph drop _all
* ==============================================================================


* ==============================================================================
* Table 4: Spatial regression-discontinuity estimates, Geni database.
* ------------------------------------------------------------------------------

use "$DAT/final-geni.dta", clear

* Sample
keep if gender=="f" 
keep if byear>=1700 & byear<=1810
keep if twoplus_flag==1

* geographic polynomials
global dist1 c.dista#T01#affected
global dist2 c.dista#T01#affected c.dista_sq#T01#affected

* flexible trends
global controls c.(cl_peril_i pw1780 ldensity_1793)#byear near_eveche#byear near_socpol#byear near_rebellion#byear near_bailliage#byear near_subdeleg#byear near_recette#byear near_cassini#byear near_post#byear

* FE for bw calculation
qui encode insee_com, gen(ninsee_com)
qui tab byear, gen(byear_)
qui tab segment_affected_50, gen(segment_affected_50_)
qui tab bailliage_id, gen(baiFE_)


* MSE optimal bandwidth & kernels
* -------------------------------
// baseline specifications
* col (1)-(3) 
rdbwselect nfert dista, c(0) p(1) vce(cluster ninsee_com) covs(byear_* segment_affected_50_* T01xaffected) kernel(triangular) bwselect(mserd)
local d = `e(h_mserd)'
global da1 = round(`d',0.01)
gen wgt_1 = 1 - dista/`d' if inrange(dista, 0, `d')
replace wgt_1 = 1 - dista/(-`d') if inrange(dista, -`d', 0)
replace wgt_1 = 0 if mi(wgt_1)
* col (4) 
rdbwselect nfert dista, c(0) p(1) vce(cluster ninsee_com) covs(byear_* segment_affected_50_* baiFE_*) kernel(triangular) bwselect(mserd)
local d = `e(h_mserd)'
global da4 = round(`d',0.01)
gen wgt_4 = 1 - dista/`d' if inrange(dista, 0, `d')
replace wgt_4 = 1 - dista/(-`d') if inrange(dista, -`d', 0)
replace wgt_4 = 0 if mi(wgt_4)
* col (5)
xi i.byear*cl_peril_i, prefix(_A)
xi i.byear*ldensity_1793, prefix(_B)
xi i.byear*pw1780, prefix(_C)
local t= 1
foreach x in near_eveche near_socpol near_rebellion near_bailliage near_subdeleg near_recette near_cassini near_post{
xi i.`x'*i.byear, prefix(_D`t')
local t=`t'+1
}
rdbwselect nfert dista, c(0) p(1) vce(cluster ninsee_com) covs(byear_* segment_affected_50_* baiFE_* logpwheat _A* _B* _C* _D*) kernel(triangular) bwselect(mserd) bwcheck(650)
local d = `e(h_mserd)'
global da5 = round(`d',0.01)
gen wgt_5 = 1 - dista/`d' if inrange(dista, 0, `d')
replace wgt_5 = 1 - dista/(-`d') if inrange(dista, -`d', 0)
replace wgt_5 = 0 if mi(wgt_5)
* col (6) 
rdbwselect nfert dista, c(0) p(2) vce(cluster ninsee_com) covs(byear_* segment_affected_50_* baiFE_*) kernel(triangular) bwselect(mserd)
local d = `e(h_mserd)'
global da6 = round(`d',0.01)
gen wgt_6 = 1 - dista/`d' if inrange(dista, 0, `d')
replace wgt_6 = 1 - dista/(-`d') if inrange(dista, -`d', 0)
replace wgt_6 = 0 if mi(wgt_6)
* col (7)
rdbwselect nfert dista, c(0) p(2) vce(cluster ninsee_com) covs(byear_* segment_affected_50_* baiFE_* logpwheat _A* _B* _C* _D*) kernel(triangular) bwselect(mserd) bwcheck(650)
local d = `e(h_mserd)'
global da7 = round(`d',0.01)
gen wgt_7 = 1 - dista/`d' if inrange(dista, 0, `d')
replace wgt_7 = 1 - dista/(-`d') if inrange(dista, -`d', 0)
replace wgt_7 = 0 if mi(wgt_7)
drop _A* _B* _C* _D*

* RD-DD results
* -------------------------------

* col (1) 
gen Xvar = affected
label var Xvar "Estimate"
xi: reghdfe nfert Xvar [aw=wgt_1] if T01==0, absorb(byear ${dist1} segment_affected_50) cluster(insee_com)
qui summ nfert if e(sample)==1
local mdv = round(`r(mean)',0.01)
outreg2 using "$TAB/table4.tex", replace ///
    keep(Xvar) ctitle(RD pre-reform) nocons tex(fragment) nonote nor2 noobs long dec(3) label ///
	addtext(Cohort FE, Y, Border segment FE, Y, Bailliage FE, ., Flexible trends, ., N, `e(N)', N clusters, `e(N_clust)', Order polynomial, 1, MSE Optimal bandwidth, $da1, Mean dep var, `mdv') ///
	addnote(Geni h-sample women born 1700-1810 + geolocated, Triangular kernel; SE clustered by locality; *p<.05; **p<.01; ***p<.001)

* col (2) 
xi: reghdfe nfert Xvar [aw=wgt_1] if T01==1, absorb(byear ${dist1} segment_affected_50) cluster(insee_com)
qui summ nfert if e(sample)==1
local mdv = round(`r(mean)',0.01)
outreg2 using "$TAB/table4.tex", ///
    keep(Xvar) ctitle(RD post-reform) nocons tex(fragment) nonote nor2 noobs long dec(3) label ///
	addtext(Cohort FE, Y, Border segment FE, Y, Bailliage FE, ., Flexible trends, ., N, `e(N)', N clusters, `e(N_clust)', Order polynomial, 1, MSE Optimal bandwidth, $da1, Mean dep var, `mdv') ///
	addnote(Geni h-sample women born 1700-1810 + geolocated, Triangular kernel; SE clustered by locality; *p<.05; **p<.01; ***p<.001)

* col (3) 
replace Xvar = T01xaffected
xi: reghdfe nfert affected Xvar [aw=wgt_1], absorb(byear ${dist1} segment_affected_50) cluster(insee_com)
qui summ nfert if e(sample)==1
local mdv = round(`r(mean)',0.01)
outreg2 using "$TAB/table4.tex", ///
    keep(Xvar) ctitle(RD-DD) nocons tex(fragment) nonote nor2 noobs long dec(3) label ///
	addtext(Cohort FE, Y, Border segment FE, Y, Bailliage FE, ., Flexible trends, ., N, `e(N)', N clusters, `e(N_clust)', Order polynomial, 1, MSE Optimal bandwidth, $da1, Mean dep var, `mdv') ///
	addnote(Geni h-sample women born 1700-1810 + geolocated, Triangular kernel; SE clustered by locality; *p<.05; **p<.01; ***p<.001)

* col (4) 
xi: reghdfe nfert Xvar logpwheat [aw=wgt_4], absorb(byear ${dist1} bailliage_id segment_affected_50) cluster(insee_com)
qui summ nfert if e(sample)==1
local mdv = round(`r(mean)',0.01)
outreg2 using "$TAB/table4.tex", ///
	keep(Xvar) ctitle(RD-DD) nocons tex(fragment) nonote nor2 noobs long dec(3) label ///
	addtext(Cohort FE, Y, Border segment FE, Y, Bailliage FE, Y, Flexible trends, ., N, `e(N)', N clusters, `e(N_clust)', Order polynomial, 1, MSE Optimal bandwidth, $da4, Mean dep var, `mdv') ///
	addnote(Geni h-sample women born 1700-1810 + geolocated, Triangular kernel; SE clustered by locality; *p<.05; **p<.01; ***p<.001)
		
* col (5) 
xi: reghdfe nfert Xvar logpwheat [aw=wgt_5], absorb(byear ${dist1} bailliage_id segment_affected_50 $controls ) cluster(insee_com)
qui summ nfert if e(sample)==1
local mdv = round(`r(mean)',0.01)
outreg2 using "$TAB/table4.tex", ///
	keep(Xvar) ctitle(RD-DD) nocons tex(fragment) nonote nor2 noobs long dec(3) label ///
	addtext(Cohort FE, Y, Border segment FE, Y, Bailliage FE, Y, Flexible trends, Y, N, `e(N)', N clusters, `e(N_clust)', Order polynomial, 1, MSE Optimal bandwidth, $da5, Mean dep var, `mdv') ///
	addnote(Geni h-sample women born 1700-1810 + geolocated, Triangular kernel; SE clustered by locality; *p<.05; **p<.01; ***p<.001)
		
* col (6) 
xi: reghdfe nfert Xvar [aw=wgt_6], absorb(byear ${dist2} bailliage_id segment_affected_50) cluster(insee_com)
qui summ nfert if e(sample)==1
local mdv = round(`r(mean)',0.01)
outreg2 using "$TAB/table4.tex", ///
	keep(Xvar) ctitle(RD-DD) nocons tex(fragment) nonote nor2 noobs long dec(3) label ///
	addtext(Cohort FE, Y, Border segment FE, Y, Bailliage FE, Y, Flexible trends, ., N, `e(N)', N clusters, `e(N_clust)', Order polynomial, 2, MSE Optimal bandwidth, $da6, Mean dep var, `mdv') ///
	addnote(Geni h-sample women born 1700-1810 + geolocated, Triangular kernel; SE clustered by locality; *p<.05; **p<.01; ***p<.001)
		
* col (7) 
xi: reghdfe nfert affected Xvar logpwheat [aw=wgt_7], absorb(byear ${dist2} bailliage_id segment_affected_50 $controls ) cluster(insee_com)
qui summ nfert if e(sample)==1
local mdv = round(`r(mean)',0.01)
outreg2 using "$TAB/table4.tex", ///
	keep(Xvar) ctitle(RD-DD) nocons tex(fragment) nonote nor2 noobs long dec(3) label ///
	addtext(Cohort FE, Y, Border segment FE, Y, Bailliage FE, Y, Flexible trends, Y, N, `e(N)', N clusters, `e(N_clust)', Order polynomial, 2, MSE Optimal bandwidth, $da7, Mean dep var, `mdv') ///
	addnote(Geni h-sample women born 1700-1810 + geolocated, Triangular kernel; SE clustered by locality; *p<.05; **p<.01; ***p<.001)
* ==============================================================================

timer off 1 /* 108 seconds */
timer list